





<!DOCTYPE html>
<html class="writer-html5" lang="zh-CN" >
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Cross Compilation and RPC &mdash; tvm 0.8.dev1982 文档</title>
  

  
  <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../_static/gallery.css" type="text/css" />
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/tlcpack_theme.css" type="text/css" />

  
  
    <link rel="shortcut icon" href="../_static/tvm-logo-square.png"/>
  

  
  
  
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
        <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
        <script src="../_static/jquery.js"></script>
        <script src="../_static/underscore.js"></script>
        <script src="../_static/doctools.js"></script>
        <script src="../_static/translations.js"></script>
    
    <script type="text/javascript" src="../_static/js/theme.js"></script>

    
    <script type="text/javascript" src="../_static/js/tlcpack_theme.js"></script>
    <link rel="index" title="索引" href="../genindex.html" />
    <link rel="search" title="搜索" href="../search.html" />
    <link rel="next" title="编译深度学习模型的快速开始教程" href="relay_quick_start.html" />
    <link rel="prev" title="Optimizing Operators with Auto-scheduling" href="auto_scheduler_matmul_x86.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    
<header class="header">
    <div class="innercontainer">
      <div class="headerInner d-flex justify-content-between align-items-center">
          <div class="headerLogo">
               <a href="https://tvm.apache.org/"><img src=https://tvm.apache.org/assets/images/logo.svg alt="logo"></a>
          </div>

          <div id="headMenu" class="headerNav">
            <button type="button" id="closeHeadMenu" class="navCloseBtn"><img src="../_static/img/close-icon.svg" alt="Close"></button>
             <ul class="nav">
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/community>Community</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/download>Download</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/vta>VTA</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/blog>Blog</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/docs>Docs</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvmconf.org>Conference</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://github.com/apache/tvm/>Github</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvmchinese.github.io/declaration_zh_CN.html>About-Translators</a>
                </li>
             </ul>
               <div class="responsivetlcdropdown">
                 <button type="button" class="btn-link">
                   ASF
                 </button>
                 <ul>
                     <li>
                       <a href=https://apache.org/>Apache Homepage</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/licenses/>License</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/security/>Security</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/thanks.html>Thanks</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/events/current-event>Events</a>
                     </li>
                     <li>
                       <a href=https://www.zhihu.com/column/c_1429578595417563136>Zhihu</a>
                     </li>
                 </ul>
               </div>
          </div>
            <div class="responsiveMenuIcon">
              <button type="button" id="menuBtn" class="btn-menu"><img src="../_static/img/menu-icon.svg" alt="Menu Icon"></button>
            </div>

            <div class="tlcDropdown">
              <div class="dropdown">
                <button type="button" class="btn-link dropdown-toggle" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
                  ASF
                </button>
                <div class="dropdown-menu dropdown-menu-right">
                  <ul>
                     <li>
                       <a href=https://apache.org/>Apache Homepage</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/licenses/>License</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/security/>Security</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/thanks.html>Thanks</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/events/current-event>Events</a>
                     </li>
                     <li>
                       <a href=https://www.zhihu.com/column/c_1429578595417563136>Zhihu</a>
                     </li>
                  </ul>
                </div>
              </div>
          </div>
       </div>
    </div>
 </header>
 
    <nav data-toggle="wy-nav-shift" class="wy-nav-side fixed">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

          
            <a href="../index.html">
          

          
            
            <img src="../_static/tvm-logo-small.png" class="logo" alt="Logo"/>
          
          </a>

          
            
            
                <div class="version">
                  0.8.dev1982
                </div>
            
          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        
        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
              
            
            
              <p class="caption" role="heading"><span class="caption-text">如何开始</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../install/index.html">安装 TVM</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contribute/index.html">贡献者指南</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">用户引导</span></p>
<ul class="current">
<li class="toctree-l1 current"><a class="reference internal" href="index.html">User Tutorial</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="introduction.html">介绍</a></li>
<li class="toctree-l2"><a class="reference internal" href="introduction.html#an-overview-of-tvm-and-model-optimization">TVM和模型优化的概述</a></li>
<li class="toctree-l2"><a class="reference internal" href="install.html">安装 TVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="tvmc_command_line_driver.html">使用TVMC编译和优化一个模型</a></li>
<li class="toctree-l2"><a class="reference internal" href="autotvm_relay_x86.html">Compiling and Optimizing a Model with the Python Interface (AutoTVM)</a></li>
<li class="toctree-l2"><a class="reference internal" href="tensor_expr_get_started.html">使用张量表达式来处理运算符</a></li>
<li class="toctree-l2"><a class="reference internal" href="autotvm_matmul_x86.html">Optimizing Operators with Schedule Templates and AutoTVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="auto_scheduler_matmul_x86.html">Optimizing Operators with Auto-scheduling</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">Cross Compilation and RPC</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#build-tvm-runtime-on-device">在设备上构建 TVM 运行时间</a></li>
<li class="toctree-l3"><a class="reference internal" href="#set-up-rpc-server-on-device">在设备上设置 RPC 服务器</a></li>
<li class="toctree-l3"><a class="reference internal" href="#declare-and-cross-compile-kernel-on-local-machine">Declare and Cross Compile Kernel on Local Machine</a></li>
<li class="toctree-l3"><a class="reference internal" href="#run-cpu-kernel-remotely-by-rpc">Run CPU Kernel Remotely by RPC</a></li>
<li class="toctree-l3"><a class="reference internal" href="#run-opencl-kernel-remotely-by-rpc">Run OpenCL Kernel Remotely by RPC</a></li>
<li class="toctree-l3"><a class="reference internal" href="#summary">总结</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="relay_quick_start.html">编译深度学习模型的快速开始教程</a></li>
<li class="toctree-l2"><a class="reference internal" href="intro_topi.html">Introduction to TOPI</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../how_to/index.html">How To Guides</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">开发者引导</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../dev/tutorial/index.html">Developer Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dev/how_to/how_to.html">开发者指南</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">架构指南</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../arch/index.html">Design and Architecture</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">主题引导</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../topic/microtvm/index.html">microTVM：裸机使用TVM</a></li>
<li class="toctree-l1"><a class="reference internal" href="../topic/vta/index.html">VTA: Versatile Tensor Accelerator</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">参考指南</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../reference/langref/index.html">语言参考</a></li>
<li class="toctree-l1"><a class="reference internal" href="../reference/api/python/index.html">Python API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../reference/api/links.html">Other APIs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../reference/publications.html">Publications</a></li>
<li class="toctree-l1"><a class="reference internal" href="../genindex.html">索引</a></li>
</ul>

            
          
        </div>
        
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
      
      <nav class="wy-nav-top" aria-label="top navigation" data-toggle="wy-nav-top">
        
            <div class="togglemenu">

            </div>
            <div class="nav-content">
              <!-- tvm -->
              Table of content
            </div>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        

          




















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="../index.html">Docs</a> <span class="br-arrow">></span></li>
        
          <li><a href="index.html">User Tutorial</a> <span class="br-arrow">></span></li>
        
      <li>Cross Compilation and RPC</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
            <a href="../_sources/tutorial/cross_compilation_and_rpc.rst.txt" rel="nofollow"> <img src="../_static//img/source.svg" alt="viewsource"/></a>
          
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="sphx-glr-download-link-note admonition note">
<p class="admonition-title">注解</p>
<p>点击 <a class="reference internal" href="#sphx-glr-download-tutorial-cross-compilation-and-rpc-py"><span class="std std-ref">此处</span></a> 获取完整示例代码</p>
</div>
<div class="sphx-glr-example-title section" id="cross-compilation-and-rpc">
<span id="tutorial-cross-compilation-and-rpc"></span><span id="sphx-glr-tutorial-cross-compilation-and-rpc-py"></span><h1>Cross Compilation and RPC<a class="headerlink" href="#cross-compilation-and-rpc" title="永久链接至标题">¶</a></h1>
<p><strong>作者</strong>: <a class="reference external" href="https://github.com/ZihengJiang/">Ziheng Jiang</a>, <a class="reference external" href="https://github.com/merrymercy/">Lianmin Zheng</a></p>
<p>This tutorial introduces cross compilation and remote device
execution with RPC in TVM.</p>
<p>With cross compilation and RPC, you can <strong>compile a program on your
local machine then run it on the remote device</strong>. It is useful when
the remote device resource are limited, like Raspberry Pi and mobile
platforms. In this tutorial, we will use the Raspberry Pi for a CPU example
and the Firefly-RK3399 for an OpenCL example.</p>
<div class="section" id="build-tvm-runtime-on-device">
<h2>在设备上构建 TVM 运行时间<a class="headerlink" href="#build-tvm-runtime-on-device" title="永久链接至标题">¶</a></h2>
<p>第一步是在远程设备上构建TVM运行时间。</p>
<div class="admonition note">
<p class="admonition-title">注解</p>
<p>All instructions in both this section and the next section should be
executed on the target device, e.g. Raspberry Pi.  We assume the target
is running Linux.</p>
</div>
<p>Since we do compilation on the local machine, the remote device is only used
for running the generated code. We only need to build the TVM runtime on
the remote device.</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git clone --recursive https://github.com/apache/tvm tvm
<span class="nb">cd</span> tvm
make runtime -j2
</pre></div>
</div>
<p>After building the runtime successfully, we need to set environment variables
in <code class="code docutils literal notranslate"><span class="pre">~/.bashrc</span></code> file. We can edit <code class="code docutils literal notranslate"><span class="pre">~/.bashrc</span></code>
using <code class="code docutils literal notranslate"><span class="pre">vi</span> <span class="pre">~/.bashrc</span></code> and add the line below (Assuming your TVM
directory is in <code class="code docutils literal notranslate"><span class="pre">~/tvm</span></code>):</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">PYTHONPATH</span><span class="o">=</span><span class="nv">$PYTHONPATH</span>:~/tvm/python
</pre></div>
</div>
<p>要更新环境变量，请执行:code:<cite>source ~/.bashrc</cite>。</p>
</div>
<div class="section" id="set-up-rpc-server-on-device">
<h2>在设备上设置 RPC 服务器<a class="headerlink" href="#set-up-rpc-server-on-device" title="永久链接至标题">¶</a></h2>
<p>To start an RPC server, run the following command on your remote device
(Which is Raspberry Pi in this example).</p>
<blockquote>
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python -m tvm.exec.rpc_server --host <span class="m">0</span>.0.0.0 --port<span class="o">=</span><span class="m">9090</span>
</pre></div>
</div>
</div></blockquote>
<p>如果您看到下面的行，则表示RPC服务器在您的设备上成功启动。</p>
<blockquote>
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>INFO:root:RPCServer: <span class="nb">bind</span> to <span class="m">0</span>.0.0.0:9090
</pre></div>
</div>
</div></blockquote>
</div>
<div class="section" id="declare-and-cross-compile-kernel-on-local-machine">
<h2>Declare and Cross Compile Kernel on Local Machine<a class="headerlink" href="#declare-and-cross-compile-kernel-on-local-machine" title="永久链接至标题">¶</a></h2>
<div class="admonition note">
<p class="admonition-title">注解</p>
<p>Now we go back to the local machine, which has a full TVM installed
(with LLVM).</p>
</div>
<p>Here we will declare a simple kernel on the local machine:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>

<span class="kn">import</span> <span class="nn">tvm</span>
<span class="kn">from</span> <span class="nn">tvm</span> <span class="k">import</span> <span class="n">te</span>
<span class="kn">from</span> <span class="nn">tvm</span> <span class="k">import</span> <span class="n">rpc</span>
<span class="kn">from</span> <span class="nn">tvm.contrib</span> <span class="k">import</span> <span class="n">utils</span>

<span class="n">n</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">runtime</span><span class="o">.</span><span class="n">convert</span><span class="p">(</span><span class="mi">1024</span><span class="p">)</span>
<span class="n">A</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">placeholder</span><span class="p">((</span><span class="n">n</span><span class="p">,),</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;A&quot;</span><span class="p">)</span>
<span class="n">B</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">compute</span><span class="p">((</span><span class="n">n</span><span class="p">,),</span> <span class="k">lambda</span> <span class="n">i</span><span class="p">:</span> <span class="n">A</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">+</span> <span class="mf">1.0</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;B&quot;</span><span class="p">)</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">create_schedule</span><span class="p">(</span><span class="n">B</span><span class="o">.</span><span class="n">op</span><span class="p">)</span>
</pre></div>
</div>
<p>Then we cross compile the kernel.
The target should be ‘llvm -mtriple=armv7l-linux-gnueabihf’ for
Raspberry Pi 3B, but we use ‘llvm’ here to make this tutorial runnable
on our webpage building server. See the detailed note in the following block.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">local_demo</span> <span class="o">=</span> <span class="kc">True</span>

<span class="k">if</span> <span class="n">local_demo</span><span class="p">:</span>
    <span class="n">target</span> <span class="o">=</span> <span class="s2">&quot;llvm&quot;</span>
<span class="k">else</span><span class="p">:</span>
    <span class="n">target</span> <span class="o">=</span> <span class="s2">&quot;llvm -mtriple=armv7l-linux-gnueabihf&quot;</span>

<span class="n">func</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">build</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="p">[</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">],</span> <span class="n">target</span><span class="o">=</span><span class="n">target</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;add_one&quot;</span><span class="p">)</span>
<span class="c1"># save the lib at a local temp folder</span>
<span class="n">temp</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">tempdir</span><span class="p">()</span>
<span class="n">path</span> <span class="o">=</span> <span class="n">temp</span><span class="o">.</span><span class="n">relpath</span><span class="p">(</span><span class="s2">&quot;lib.tar&quot;</span><span class="p">)</span>
<span class="n">func</span><span class="o">.</span><span class="n">export_library</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">注解</p>
<p>To run this tutorial with a real remote device, change <code class="code docutils literal notranslate"><span class="pre">local_demo</span></code>
to False and replace <code class="code docutils literal notranslate"><span class="pre">target</span></code> in <code class="code docutils literal notranslate"><span class="pre">build</span></code> with the appropriate
target triple for your device. The target triple which might be
different for different devices. For example, it is
<code class="code docutils literal notranslate"><span class="pre">'llvm</span> <span class="pre">-mtriple=armv7l-linux-gnueabihf'</span></code> for Raspberry Pi 3B and
<code class="code docutils literal notranslate"><span class="pre">'llvm</span> <span class="pre">-mtriple=aarch64-linux-gnu'</span></code> for RK3399.</p>
<p>Usually, you can query the target by running <code class="code docutils literal notranslate"><span class="pre">gcc</span> <span class="pre">-v</span></code> on your
device, and looking for the line starting with <code class="code docutils literal notranslate"><span class="pre">Target:</span></code>
(Though it may still be a loose configuration.)</p>
<p>Besides <code class="code docutils literal notranslate"><span class="pre">-mtriple</span></code>, you can also set other compilation options
like:</p>
<ul>
<li><dl class="simple">
<dt>-mcpu=&lt;cpuname&gt;</dt><dd><p>Specify a specific chip in the current architecture to generate code for. By default this is inferred from the target triple and autodetected to the current architecture.</p>
</dd>
</dl>
</li>
<li><dl>
<dt>-mattr=a1,+a2,-a3,…</dt><dd><p>Override or control specific attributes of the target, such as whether SIMD operations are enabled or not. The default set of attributes is set by the current CPU.
To get the list of available attributes, you can do:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>llc -mtriple<span class="o">=</span>&lt;your device target triple&gt; -mattr<span class="o">=</span><span class="nb">help</span>
</pre></div>
</div>
</dd>
</dl>
</li>
</ul>
<p>These options are consistent with <a class="reference external" href="http://llvm.org/docs/CommandGuide/llc.html">llc</a>.
It is recommended to set target triple and feature set to contain specific
feature available, so we can take full advantage of the features of the
board.
You can find more details about cross compilation attributes from
<a class="reference external" href="https://clang.llvm.org/docs/CrossCompilation.html">LLVM guide of cross compilation</a>.</p>
</div>
</div>
<div class="section" id="run-cpu-kernel-remotely-by-rpc">
<h2>Run CPU Kernel Remotely by RPC<a class="headerlink" href="#run-cpu-kernel-remotely-by-rpc" title="永久链接至标题">¶</a></h2>
<p>We show how to run the generated CPU kernel on the remote device.
First we obtain an RPC session from remote device.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">if</span> <span class="n">local_demo</span><span class="p">:</span>
    <span class="n">remote</span> <span class="o">=</span> <span class="n">rpc</span><span class="o">.</span><span class="n">LocalSession</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
    <span class="c1"># The following is my environment, change this to the IP address of your target device</span>
    <span class="n">host</span> <span class="o">=</span> <span class="s2">&quot;10.77.1.162&quot;</span>
    <span class="n">port</span> <span class="o">=</span> <span class="mi">9090</span>
    <span class="n">remote</span> <span class="o">=</span> <span class="n">rpc</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span><span class="n">host</span><span class="p">,</span> <span class="n">port</span><span class="p">)</span>
</pre></div>
</div>
<p>Upload the lib to the remote device, then invoke a device local
compiler to relink them. Now <cite>func</cite> is a remote module object.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">remote</span><span class="o">.</span><span class="n">upload</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="n">func</span> <span class="o">=</span> <span class="n">remote</span><span class="o">.</span><span class="n">load_module</span><span class="p">(</span><span class="s2">&quot;lib.tar&quot;</span><span class="p">)</span>

<span class="c1"># create arrays on the remote device</span>
<span class="n">dev</span> <span class="o">=</span> <span class="n">remote</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span>
<span class="n">a</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><span class="mi">1024</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">A</span><span class="o">.</span><span class="n">dtype</span><span class="p">),</span> <span class="n">dev</span><span class="p">)</span>
<span class="n">b</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">1024</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">A</span><span class="o">.</span><span class="n">dtype</span><span class="p">),</span> <span class="n">dev</span><span class="p">)</span>
<span class="c1"># the function will run on the remote device</span>
<span class="n">func</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span>
<span class="n">np</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">assert_equal</span><span class="p">(</span><span class="n">b</span><span class="o">.</span><span class="n">numpy</span><span class="p">(),</span> <span class="n">a</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
</pre></div>
</div>
<p>When you want to evaluate the performance of the kernel on the remote
device, it is important to avoid the overhead of network.
<code class="code docutils literal notranslate"><span class="pre">time_evaluator</span></code> will returns a remote function that runs the
function over number times, measures the cost per run on the remote
device and returns the measured cost. Network overhead is excluded.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">time_f</span> <span class="o">=</span> <span class="n">func</span><span class="o">.</span><span class="n">time_evaluator</span><span class="p">(</span><span class="n">func</span><span class="o">.</span><span class="n">entry_name</span><span class="p">,</span> <span class="n">dev</span><span class="p">,</span> <span class="n">number</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
<span class="n">cost</span> <span class="o">=</span> <span class="n">time_f</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">%g</span><span class="s2"> secs/op&quot;</span> <span class="o">%</span> <span class="n">cost</span><span class="p">)</span>
</pre></div>
</div>
<p class="sphx-glr-script-out">输出:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>3.126e-07 secs/op
</pre></div>
</div>
</div>
<div class="section" id="run-opencl-kernel-remotely-by-rpc">
<h2>Run OpenCL Kernel Remotely by RPC<a class="headerlink" href="#run-opencl-kernel-remotely-by-rpc" title="永久链接至标题">¶</a></h2>
<p>For remote OpenCL devices, the workflow is almost the same as above.
You can define the kernel, upload files, and run via RPC.</p>
<div class="admonition note">
<p class="admonition-title">注解</p>
<p>Raspberry Pi does not support OpenCL, the following code is tested on
Firefly-RK3399. You may follow this <a class="reference external" href="https://gist.github.com/mli/585aed2cec0b5178b1a510f9f236afa2">tutorial</a>
to setup the OS and OpenCL driver for RK3399.</p>
<p>Also we need to build the runtime with OpenCL enabled on rk3399 board. In the TVM
root directory, execute</p>
</div>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>cp cmake/config.cmake .
sed -i <span class="s2">&quot;s/USE_OPENCL OFF/USE_OPENCL ON/&quot;</span> config.cmake
make runtime -j4
</pre></div>
</div>
<p>The following function shows how we run an OpenCL kernel remotely</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">run_opencl</span><span class="p">():</span>
    <span class="c1"># NOTE: This is the setting for my rk3399 board. You need to modify</span>
    <span class="c1"># them according to your environment.</span>
    <span class="n">opencl_device_host</span> <span class="o">=</span> <span class="s2">&quot;10.77.1.145&quot;</span>
    <span class="n">opencl_device_port</span> <span class="o">=</span> <span class="mi">9090</span>
    <span class="n">target</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">target</span><span class="o">.</span><span class="n">Target</span><span class="p">(</span><span class="s2">&quot;opencl&quot;</span><span class="p">,</span> <span class="n">host</span><span class="o">=</span><span class="s2">&quot;llvm -mtriple=aarch64-linux-gnu&quot;</span><span class="p">)</span>

    <span class="c1"># create schedule for the above &quot;add one&quot; compute declaration</span>
    <span class="n">s</span> <span class="o">=</span> <span class="n">te</span><span class="o">.</span><span class="n">create_schedule</span><span class="p">(</span><span class="n">B</span><span class="o">.</span><span class="n">op</span><span class="p">)</span>
    <span class="n">xo</span><span class="p">,</span> <span class="n">xi</span> <span class="o">=</span> <span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">B</span><span class="o">.</span><span class="n">op</span><span class="o">.</span><span class="n">axis</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">factor</span><span class="o">=</span><span class="mi">32</span><span class="p">)</span>
    <span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">xo</span><span class="p">,</span> <span class="n">te</span><span class="o">.</span><span class="n">thread_axis</span><span class="p">(</span><span class="s2">&quot;blockIdx.x&quot;</span><span class="p">))</span>
    <span class="n">s</span><span class="p">[</span><span class="n">B</span><span class="p">]</span><span class="o">.</span><span class="n">bind</span><span class="p">(</span><span class="n">xi</span><span class="p">,</span> <span class="n">te</span><span class="o">.</span><span class="n">thread_axis</span><span class="p">(</span><span class="s2">&quot;threadIdx.x&quot;</span><span class="p">))</span>
    <span class="n">func</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">build</span><span class="p">(</span><span class="n">s</span><span class="p">,</span> <span class="p">[</span><span class="n">A</span><span class="p">,</span> <span class="n">B</span><span class="p">],</span> <span class="n">target</span><span class="o">=</span><span class="n">target</span><span class="p">)</span>

    <span class="n">remote</span> <span class="o">=</span> <span class="n">rpc</span><span class="o">.</span><span class="n">connect</span><span class="p">(</span><span class="n">opencl_device_host</span><span class="p">,</span> <span class="n">opencl_device_port</span><span class="p">)</span>

    <span class="c1"># export and upload</span>
    <span class="n">path</span> <span class="o">=</span> <span class="n">temp</span><span class="o">.</span><span class="n">relpath</span><span class="p">(</span><span class="s2">&quot;lib_cl.tar&quot;</span><span class="p">)</span>
    <span class="n">func</span><span class="o">.</span><span class="n">export_library</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
    <span class="n">remote</span><span class="o">.</span><span class="n">upload</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
    <span class="n">func</span> <span class="o">=</span> <span class="n">remote</span><span class="o">.</span><span class="n">load_module</span><span class="p">(</span><span class="s2">&quot;lib_cl.tar&quot;</span><span class="p">)</span>

    <span class="c1"># run</span>
    <span class="n">dev</span> <span class="o">=</span> <span class="n">remote</span><span class="o">.</span><span class="n">cl</span><span class="p">()</span>
    <span class="n">a</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><span class="mi">1024</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">A</span><span class="o">.</span><span class="n">dtype</span><span class="p">),</span> <span class="n">dev</span><span class="p">)</span>
    <span class="n">b</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">nd</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">1024</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">A</span><span class="o">.</span><span class="n">dtype</span><span class="p">),</span> <span class="n">dev</span><span class="p">)</span>
    <span class="n">func</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span>
    <span class="n">np</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">assert_equal</span><span class="p">(</span><span class="n">b</span><span class="o">.</span><span class="n">numpy</span><span class="p">(),</span> <span class="n">a</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;OpenCL test passed!&quot;</span><span class="p">)</span>
</pre></div>
</div>
</div>
<div class="section" id="summary">
<h2>总结<a class="headerlink" href="#summary" title="永久链接至标题">¶</a></h2>
<p>This tutorial provides a walk through of cross compilation and RPC
features in TVM.</p>
<ul class="simple">
<li><p>Set up an RPC server on the remote device.</p></li>
<li><p>Set up the target device configuration to cross compile the kernels on the
local machine.</p></li>
<li><p>Upload and run the kernels remotely via the RPC API.</p></li>
</ul>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorial-cross-compilation-and-rpc-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../_downloads/766206ab8f1fd80ac34d9816cb991a0d/cross_compilation_and_rpc.py"><code class="xref download docutils literal notranslate"><span class="pre">Python</span> <span class="pre">源码下载:</span> <span class="pre">cross_compilation_and_rpc.py</span></code></a></p>
</div>
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../_downloads/f289ca2466fcf79c024068c1f8642bd0/cross_compilation_and_rpc.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">Jupyter</span> <span class="pre">notebook</span> <span class="pre">下载:</span> <span class="pre">cross_compilation_and_rpc.ipynb</span></code></a></p>
</div>
</div>
<p class="sphx-glr-signature"><a class="reference external" href="https://sphinx-gallery.github.io">Gallery generated by Sphinx-Gallery</a></p>
</div>
</div>


           </div>
           
          </div>
          

<footer>

    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="relay_quick_start.html" class="btn btn-neutral float-right" title="编译深度学习模型的快速开始教程" accesskey="n" rel="next">下一个 <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="auto_scheduler_matmul_x86.html" class="btn btn-neutral float-left" title="Optimizing Operators with Auto-scheduling" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> 上一个</a>
      
    </div>

<div id="button" class="backtop"><img src="../_static//img/right.svg" alt="backtop"/> </div>
<section class="footerSec">
    <div class="footerHeader">
      <ul class="d-flex align-md-items-center justify-content-between flex-column flex-md-row">
        <li class="copywrite d-flex align-items-center">
          <h5 id="copy-right-info">© 2020 Apache Software Foundation | All right reserved</h5>
        </li>
      </ul>

    </div>

    <ul>
      <li class="footernote">Copyright © 2020 The Apache Software Foundation. Apache TVM, Apache, the Apache feather, and the Apache TVM project logo are either trademarks or registered trademarks of the Apache Software Foundation.</li>
    </ul>

</section>
</footer>
        </div>
      </div>

    </section>

  </div>
  

    <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js" integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q" crossorigin="anonymous"></script>
    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js" integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"></script>

  </body>
  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
  
    
    <!-- Theme Analytics -->
    <script>
    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
    })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

    ga('create', 'UA-75982049-2', 'auto');
    ga('send', 'pageview');
    </script>

    
   

</body>
</html>